global data ".../data"


* Get own and employer contribution to DC account and earnings from RAND file
use hhid pn hhidpn ragender inw* r*iearn r*dccont* r*dcbene* r*agey_b r*iwendy ///
using "$data/randhrs1992_2018v1", clear

*Prepare varnames for reshaping
forvalues t = 1/13{
	rename r`t'iearn earn`t'
	rename r`t'agey_b age`t'
	rename r`t'iwendy iwyear`t'
	rename r`t'dccont1 dccont1_`t'
	rename r`t'dccont2 dccont2_`t'
	rename r`t'dccont3 dccont3_`t'
	rename r`t'dcbene1 dcbene1_`t'
	rename r`t'dcbene2 dcbene2_`t'
	rename r`t'dcbene3 dcbene3_`t' 
}

*Reshape the data
reshape long earn dccont1_ dccont2_ dccont3_ ///
dcbene1_ dcbene2_ dcbene3_ age iwyear inw, i(hhidpn) j(wave)

*Drop data on 4th plan offered since it is only asked in certain waves and seldom populated
drop r*4

*Sum of R's and employers contributions
forvalues p = 1/3{
	replace dccont`p'_ = 0 if inlist(dccont`p'_,.w,.s,.b,.q,.k)
    replace dcbene`p'_ = 0 if inlist(dcbene`p'_,.w,.s,.b,.q,.k)     
}
gen rcontrib = dccont1_ + dccont2_ + dccont3_
gen econtrib = dcbene1_ + dcbene2_ + dcbene3_

*R's and employers contribution rates
gen rrate = rcontrib/earn
gen erate = econtrib/earn
gen trate = rrate+erate

tabstat rrate trate if rrate>0 & age>=48, by(age) stats(p50)

*------------------------------------------------------------------------------*
* Match function 
*------------------------------------------------------------------------------*

*Bins of R's contribution rate 
gen double binrrate = .
forvalues b = .005(.005).105{
	replace binrrate = `b' if rrate<`b'+.005 & rrate>=`b'
}

*Median employer contribution rate within each bin
bysort binrrate: egen mederate = median(erate)

reg mederate binrrate if binrrate>0 & binrrate<.07
sum erate if rrate>.07 & rrate<.1, d

keep binrrate mederate
drop if binrrate==.
duplicates drop
gen fiterate = .011+.39*binrrate
gen temp = fiterate if binrrate==.065
egen temp2 = max(temp)
drop temp
replace fiterate = temp2 if binrrate>.065
